import numpy as np
import scipy.stats as stats
import os

str = r'E:\A_experimentNeed\Use_data\GWC_spaceGwc\space_GWC04-09'
files_2 = [f for f in os.listdir(str) if f.endswith('.txt')]

P_value = []  # List to store p-values
shanbei = []  # List to store data points for the Shaanbei region
s_guanzhong = []  # List to store data points for the Guanzhong region
shannan = []  # List to store data points for the Shannan region
shanbei_out = []  # List to store outliers for the Shaanbei region
guanzhong_out = []  # List to store outliers for the Guanzhong region
shannan_out = []  # List to store outliers for the Shannan region

i = 0
for file in files_2:
    precipitations = np.loadtxt(f'{str}/{file}')
    precipitation = precipitations

    # Perform Pearson Type III distribution fitting to obtain the fitting parameters
    params = stats.pearson3.fit(precipitation)
    # params = stats.norm.fit(precipitation)
    # params = stats.lognorm.fit(precipitation)
    # params = stats.genextreme.fit(precipitation)
    # params = stats.gumbel_l.fit(precipitation)
    # params = stats.beta.fit(precipitation)

    # Execute the Anderson-Darling test function
    # Calculate the Anderson-Darling statistic
    def calculate_AD(data, params):
        n = len(data)
        sorted_data = np.sort(data)
        cdf = stats.pearson3.cdf(np.sort(precipitation), *params)
        # cdf = stats.norm.cdf(np.sort(precipitation), *params)
        # cdf = stats.lognorm.cdf(np.sort(precipitation), *params)
        # cdf = stats.genextreme.cdf(np.sort(precipitation), *params)
        # cdf = stats.gumbel_l.cdf(np.sort(precipitation), *params)
        # cdf = stats.beta.cdf(np.sort(precipitation), *params)
        rank = stats.rankdata(sorted_data)
        AD = -n - (1 / n) * np.sum((2 * rank - 1) * (np.log(cdf) + np.log(1 - cdf[::-1])))
        Z = AD * (1 + 0.75 / n + 2.25 / (n ** 2))
        return AD, Z

    # Calculate the Anderson-Darling statistic
    AD, Z = calculate_AD(precipitation, params)
    # print('Corrected Z:', Z)
    # print('AD value:', AD)

    # Calculate the p-value
    if Z >= 0.6:
        p = np.exp(1.2937 - 5.709 * Z + 0.0186 * (Z ** 2))  # z >= 0.6
    elif 0.34 < Z < 0.6:
        p = np.exp(0.9177 - 4.279 * Z - 1.38 * (Z ** 2))  # 0.34 < z < 0.6
    elif 0.2 < Z <= 0.34:
        p = 1 - np.exp(-8.318 + 42.796 * Z - 59.938 * (Z ** 2))  # 0.2 < z <= 0.34
    else:
        p = 1 - np.exp(-13.436 + 101.14 * Z - 223.73 * (Z ** 2))  # z < 0.2

    print('p-value:', p)

    # Data points
    if p > 0.05:
        lat = float(file[0:6])
        lon = float(file[7:14])
        if lat >= 36.0 and lat <= 40.0:
            shanbei.append([lat, lon, p])
        elif lat >= 34 and lat < 36:
            s_guanzhong.append([lat, lon, p])
        else:
            shannan.append([lat, lon, p])

    if p < 0.05:
        lat = float(file[0:6])
        lon = float(file[7:14])
        if lat >= 36.0 and lat <= 40.0:
            shanbei_out.append([lat, lon, p])
        elif lat >= 34 and lat < 36:
            guanzhong_out.append([lat, lon, p])
        else:
            shannan_out.append([lat, lon, p])

    P_value.append(p)

array = np.array(P_value)
print("Number of p-values > 0.05:", np.sum(array > 0.05))
a = []
for i in range(len(array)):
    if array[i] > 0.05:
        a.append(array[i])
print('>0.05', a)

# Save the results to a text file
file_name = 'shannan.txt'
file_path = os.path.join(os.path.expanduser('~'), 'Desktop')
np.savetxt(f'{file_path}/{file_name}', shannan, fmt='%5.4f', delimiter='  ')